from datetime import datetime
from sklearn.model_selection import train_test_split, GridSearchCV, learning_curve
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import Lasso, LinearRegression, Ridge, BayesianRidge, Perceptron, LassoLarsCV, LassoLarsIC, LassoCV, RidgeCV, ElasticNetCV, ARDRegression, BayesianRidge, HuberRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from log_tool import logTool

para_kernel = "rbf"
para_alpha = 0
para_activation = "logistic"

INPUT_PATH = "../data/train_processed.csv"
INPUT_TEST_PATH = "../data/test_processed.csv"
OUTPUT_PATH = "../data/model/reault%s.csv" % (datetime.now())
# MODEL_INFOR = "data formulated-one, MLP base, \npara: para_hidden_layer_sizes: %s, para_activation: %s, para_alpha: %s" % (
#     para_hidden_layer_sizes, para_activation, para_alpha)
MODEL_INFOR = "data formulated-one, MLP base SVR"
LOG_PATH1 = "../data/class_log"
LOG_PATH2 = "../data/result_log"


log = logTool(LOG_PATH1)
log_res = logTool(LOG_PATH2)
log_res.info(OUTPUT_PATH)
log_res.info(MODEL_INFOR)

feature = []
label = []
with open(INPUT_PATH) as f:
    for line in f:
        ll = [float(item) for item in line.strip().split(",")]
        feature.append(ll[2:])
        label.append(ll[1])
log.info("data import completed")
print(len(feature))

log.info("-----------------model selection stage--------------------")
log.info(MODEL_INFOR)
log.info("start training...")
X_train, X_test, y_train, y_test = train_test_split(
    feature, label, test_size=0.33, random_state=5)
clf = GradientBoostingRegressor(n_estimators=100,max_depth=2,criterion='mae',loss='huber')
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
log.info("finish training")

log.info("evaluating...")
MSE = mean_squared_error(y_predict, y_test)
MAE = mean_absolute_error(y_predict, y_test)
R2 = r2_score(y_test, y_predict)
result_rep = "MSE: %s, MAE: %s, R2: %s" % (MSE, MAE, R2)
log.info(result_rep)
log_res.info(result_rep)


log.info("-----------------prediction stage--------------------")
log.info("start training...")
clf = GradientBoostingRegressor(n_estimators=100,max_depth=2,criterion='mae',loss='huber')
clf.fit(feature, label)
y_pred = clf.predict(feature)
log.info("finish training")

log.info("evaluating...")
MSE = mean_squared_error(label, y_pred)
MAE = mean_absolute_error(label, y_pred)
R2 = r2_score(label, y_pred)
result_rep = "MSE: %s, MAE: %s, R2: %s" % (MSE, MAE, R2)
log.info(result_rep)
log_res.info(result_rep)

log.info("output result file")
feature = []
with open(INPUT_TEST_PATH) as f:
    for line in f:

        ll = [float(item) for item in line.strip().split(",")]
        feature.append(ll[1:])
y_output = clf.predict(feature)
fout = open(OUTPUT_PATH, 'w')
fout.writelines("time,prediction")
index = 1
for res in y_output:
    output_line = "\n" + str(index) + "," + str(res)
    fout.writelines(output_line)
    index = index + 1
fout.close()

log.info("-----------------completed--------------------")
